In [2]:
import pyspark 
sc = pyspark.SparkContext('local[*]')
# do something to prove it works
rdd = sc.parallelize(range(1000))
rdd.takeSample(False, 5)
Out[2]:
[102, 700, 423, 396, 90]
In [3]:
sc
Out[3]:

SparkContext

Spark UI

Version
v2.4.3
Master
local[*]
AppName
pyspark-shell
In [5]:
%matplotlib inline

import matplotlib.pyplot as plt

fig, ax = plt.subplots()
ax.broken_barh([(110, 30), (150, 10)], (10, 9), facecolors='tab:blue')
ax.broken_barh([(10, 50), (100, 20), (130, 10)], (20, 9),
               facecolors=('tab:orange', 'tab:green', 'tab:red'))
ax.set_ylim(5, 35)
ax.set_xlim(0, 200)
ax.set_xlabel('seconds since start')
ax.set_yticks([15, 25])
ax.set_yticklabels(['Bill', 'Jim'])
ax.grid(True)
ax.annotate('race interrupted', (61, 25),
            xytext=(0.8, 0.9), textcoords='axes fraction',
            arrowprops=dict(facecolor='black', shrink=0.05),
            fontsize=16,
            horizontalalignment='right', verticalalignment='top')

plt.show()

Grágico de baras com matplotlib

In [7]:
%matplotlib inline

import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt

objects = ('Python', 'C++', 'Java', 'Perl', 'Scala', 'Lisp')
y_pos = np.arange(len(objects))
performance = [10,8,6,4,2,1]

plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects)
plt.ylabel('Usage')
plt.title('Programming language usage')

plt.show()

Seaborn

In [9]:
import seaborn as sns
sns.set(style="ticks")

# Load the example dataset for Anscombe's quartet
df = sns.load_dataset("anscombe")

# Show the results of a linear regression within each dataset
sns.lmplot(x="x", y="y", col="dataset", hue="dataset", data=df,
           col_wrap=2, ci=None, palette="muted", height=4,
           scatter_kws={"s": 50, "alpha": 1})
Out[9]:
<seaborn.axisgrid.FacetGrid at 0x7fd222517b38>
In [3]:
!pip install plotly --upgrade --quiet
In [10]:
%load_ext autoreload
%autoreload 2
In [19]:
import plotly.offline as py
py.init_notebook_mode(connected=False)
In [20]:
import pandas as pd
import numpy as np
import plotly.offline as py
import plotly.graph_objs as go
import json

py.init_notebook_mode(connected=False)

izip = zip

df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/globe_contours.csv')
df.head()

contours = []

scl = ['rgb(213,62,79)','rgb(244,109,67)','rgb(253,174,97)',\
       'rgb(254,224,139)','rgb(255,255,191)','rgb(230,245,152)',\
       'rgb(171,221,164)','rgb(102,194,165)','rgb(50,136,189)']

def pairwise(iterable):
    a = iter(iterable)
    return izip(a, a)

i=0
for lat, lon in pairwise(df.columns):
    contours.append( dict(
        type = 'scattergeo',
        lon = df[lon],
        lat = df[lat],
        mode = 'lines',
        line = dict(
            width = 2,
            color = scl[i]
        )
    ) )
    i = 0 if i+1 >= len(df.columns)/4 else i+1
    
layout = dict(
        margin = dict( t = 0, l = 0, r = 0, b = 0 ),
        showlegend = False,         
        geo = dict(
            showland = True,
            showlakes = True,
            showcountries = True,
            showocean = True,
            countrywidth = 0.5,
            landcolor = 'rgb(230, 145, 56)',
            lakecolor = 'rgb(0, 255, 255)',
            oceancolor = 'rgb(0, 255, 255)',
            projection = dict( 
                type = 'orthographic',
                rotation = dict(lon = 0, lat = 0, roll = 0 )            
            ),
            lonaxis = dict( 
                showgrid = True,
                gridcolor = 'rgb(102, 102, 102)',
                gridwidth = 0.5
            ),
            lataxis = dict( 
                showgrid = True,
                gridcolor = 'rgb(102, 102, 102)',
                gridwidth = 0.5
            )
        )
    )

sliders = []

lon_range = np.arange(-180, 180, 10)
lat_range = np.arange(-90, 90, 10)

sliders.append( 
    dict(
        active = len(lon_range)/2,
        currentvalue = {"prefix": "Longitude: "},
        pad = {"t": 0},
        steps = [{
                'method':'relayout', 
                'label':str(i),
                'args':['geo.projection.rotation.lon', i]} for i in lon_range]
    )      
)

sliders.append( 
    dict(
        active = len(lat_range)/2,
        currentvalue = {"prefix": "Latitude: "},
        pad = {"t": 100},
        steps = [{
                'method':'relayout', 
                'label':str(i),
                'args':['geo.projection.rotation.lat', i]} for i in lat_range]
    )      
)

projections = [ "equirectangular", "mercator", "orthographic", "natural earth","kavrayskiy7", 
               "miller", "robinson", "eckert4", "azimuthal equal area","azimuthal equidistant", 
               "conic equal area", "conic conformal", "conic equidistant", "gnomonic", "stereographic", 
               "mollweide", "hammer", "transverse mercator", "albers usa", "winkel tripel" ]

buttons = [ dict( args=['geo.projection.type', p], label=p, method='relayout' ) for p in projections ]

annot = list([ dict( x=0.1, y=0.8, text='Projection', yanchor='bottom', 
                    xref='paper', xanchor='right', showarrow=False )])

# Update Layout Object

layout[ 'updatemenus' ] = list([ dict( x=0.1, y=0.8, buttons=buttons, yanchor='top' )])

layout[ 'annotations' ] = annot

layout[ 'sliders' ] = sliders
fig = dict( data=contours, layout=layout )
py.iplot( fig)
In [21]:
import plotly.express as px
iris = px.data.iris()
fig = px.scatter(iris, x="sepal_width", y="sepal_length")
fig.show()
In [22]:
import plotly.graph_objects as go

fig = go.Figure(data=go.Scatter(
    x=[1, 2, 3, 4],
    y=[10, 11, 12, 13],
    mode='markers',
    marker=dict(size=[40, 60, 80, 100],
                color=[0, 1, 2, 3])
))

fig.show()